Exploring CSW access in Python


In [3]:
from IPython.core.display import HTML
HTML('<iframe src=http://cmgds.marine.usgs.gov/geonetwork/srv/en/main.home width=900 height=280></iframe>')


Out[3]:

In [8]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes

In [5]:
# connect to CSW, explore it's properties
#endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
#endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC Geoportal: granule level
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'  # NODC Geoportal: collection level
    
#endpoint = 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw'  # NRCAN CUSTOM
#endpoint = 'http://geoport.whoi.edu/gi-cat/services/cswiso' # USGS Woods Hole GI_CAT
#endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw' # USGS CIDA Geonetwork
endpoint = 'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw' # USGS Coastal and Marine Program
csw = CatalogueServiceWeb(endpoint)
csw.version


Out[5]:
'2.0.2'

In [6]:
[op.name for op in csw.operations]


Out[6]:
['GetCapabilities',
 'DescribeRecord',
 'GetDomain',
 'GetRecords',
 'GetRecordById',
 'Transaction']

In [7]:
bbox=[-141,42,-52,84]
#bbox=[-71.5, 39.5, -63.0, 46]
csw.getrecords(keywords=['sea_water_temperature'],bbox=bbox,maxrecords=20)
#csw.getrecords(keywords=['sea_water_temperature'],maxrecords=20)
csw.results


---------------------------------------------------------------------------
DeprecationWarning                        Traceback (most recent call last)
<ipython-input-7-db5a2ca3f992> in <module>()
      1 bbox=[-141,42,-52,84]
      2 #bbox=[-71.5, 39.5, -63.0, 46]
----> 3 csw.getrecords(keywords=['sea_water_temperature'],bbox=bbox,maxrecords=20)
      4 #csw.getrecords(keywords=['sea_water_temperature'],maxrecords=20)
      5 csw.results

/home/local/python27_epd/lib/python2.7/site-packages/owslib/csw.py in getrecords(self, qtype, keywords, typenames, propertyname, bbox, esn, sortby, outputschema, format, startposition, maxrecords, cql, xml, resulttype)
    186         raise DeprecationWarning("""Please use the updated 'getrecords2' method instead of 'getrecords'.  
    187         The 'getrecords' method will be upgraded to use the 'getrecords2' parameters
--> 188         in a future version of OWSLib.""")
    189 
    190         if xml is not None:

DeprecationWarning: Please use the updated 'getrecords2' method instead of 'getrecords'.  
        The 'getrecords' method will be upgraded to use the 'getrecords2' parameters
        in a future version of OWSLib.

In [65]:
for rec in iteritems(csw.records):
    print rec.abstract


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-65-fe2b7cabe14a> in <module>()
----> 1 for rec in iteritems(csw.records):
      2     print rec.abstract

NameError: name 'iteritems' is not defined

In [58]:
a=csw.records['data/oceansites/DATA/STATION-M/OS_STATION-M-1_194810_D_CTD.nc']

In [60]:
print a.xml


<csw:SummaryRecord xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcmiBox="http://dublincore.org/documents/2000/07/11/dcmi-box/" xmlns:dct="http://purl.org/dc/terms/" xmlns:gml="http://www.opengis.net/gml" xmlns:ows="http://www.opengis.net/ows" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:FileID">data/oceansites/DATA/STATION-M/OS_STATION-M-1_194810_D_CTD.nc</dc:identifier>
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID">{1DB52543-50EF-471E-BBFE-A5A87C42EC42}</dc:identifier>
<dc:title>OceanSITES STATION-M in-situ data</dc:title>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">downloadableData</dc:type>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">liveData</dc:type>
<dc:subject>sea_water_temperature</dc:subject>
<dc:subject>sea_water_salinity</dc:subject>
<dc:subject>depth</dc:subject>
<dc:subject>time</dc:subject>
<dc:subject>depth</dc:subject>
<dc:subject>latitude</dc:subject>
<dc:subject>longitude</dc:subject>
<dc:subject>climatologyMeteorologyAtmosphere</dc:subject>
<dct:modified>2013-03-16T02:45:29-06:00</dct:modified>
<dct:abstract>EuroSITES European Ocean Observatory NetworkEU Framework 7 collaborative project contract FP7-ENV-2007-1-202955</dct:abstract>
<dct:abstract>EuroSITES European Ocean Observatory NetworkEU Framework 7 collaborative project contract FP7-ENV-2007-1-202955</dct:abstract>
<dct:references scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink">http://dods.ndbc.noaa.gov/thredds/dodsC/data/oceansites/DATA/STATION-M/OS_STATION-M-1_194810_D_CTD.nc.html</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document">http://www.ngdc.noaa.gov/geoportal/csw?getxml=%7B1DB52543-50EF-471E-BBFE-A5A87C42EC42%7D</dct:references>
<ows:WGS84BoundingBox>
<ows:LowerCorner>-358.2666666507721 66.0</ows:LowerCorner>
<ows:UpperCorner>2.049999952316284 66.16666412353516</ows:UpperCorner>
</ows:WGS84BoundingBox>
<ows:BoundingBox>
<ows:LowerCorner>-358.2666666507721 66.0</ows:LowerCorner>
<ows:UpperCorner>2.049999952316284 66.16666412353516</ows:UpperCorner>
</ows:BoundingBox>
<dc:date>2009-11-01Z</dc:date>
</csw:SummaryRecord>


In [9]:
# get supported result types
csw.getdomain('GetRecords.resultType')
csw.results


---------------------------------------------------------------------------
ExceptionReport                           Traceback (most recent call last)
<ipython-input-9-0d430bef627f> in <module>()
      1 # get supported result types
----> 2 csw.getdomain('GetRecords.resultType')
      3 csw.results

/home/rsignell/epd-7.2-1/lib/python2.7/site-packages/owslib/csw.py in getdomain(self, dname, dtype)
    153         self.request = util.xml2string(etree.tostring(node0))
    154 
--> 155         self._invoke()
    156 
    157         if self.exceptionreport is None:

/home/rsignell/epd-7.2-1/lib/python2.7/site-packages/owslib/csw.py in _invoke(self)
    494         val = self._exml.find(util.nspath_eval('ows:Exception', namespaces))
    495         if val is not None:
--> 496             raise ows.ExceptionReport(self._exml, self.owscommon.namespace)
    497         else:
    498             self.exceptionreport = None

ExceptionReport: 'Not a valid request: GetDomain Valid requests are: GetCapabilities GetRecords GetRecordsSimple DescribeRecord GetRecordById Transaction Harvest GetResource'